{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "打开train与test文件，找出所需的eventid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "\n",
    "import itertools\n",
    "\n",
    "#处理事件字符串\n",
    "import datetime\n",
    "\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 以下是2个分开的方式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of events count: 15398\n"
     ]
    }
   ],
   "source": [
    "trainUserEvents = list()\n",
    "trainUserY = list()\n",
    "for filename in [\"train.csv\"]:   \n",
    "    f = open(filename,'rb')\n",
    "    f.readline().decode().strip().split(\",\")\n",
    "    \n",
    "    for line in f:\n",
    "        cols = line.decode().strip().split(\",\")\n",
    "        trainUserEvents.append(cols[1])\n",
    "        trainUserY.append(cols[4])\n",
    "        \n",
    "    f.close()\n",
    "    \n",
    "n_count = len(trainUserEvents)\n",
    "\n",
    "print(\"number of events count: %d\" % n_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of events count: 10237\n",
      "3025444328\n"
     ]
    }
   ],
   "source": [
    "testUserEvents = list()\n",
    "for filename in [\"test.csv\"]:\n",
    "    f = open(filename,'rb')\n",
    "    f.readline().decode().strip().split(\",\")\n",
    "    \n",
    "    for line in f:\n",
    "        cols = line.decode().strip().split(\",\")\n",
    "        testUserEvents.append(cols[1])  #user - event\n",
    "    f.close()\n",
    "    \n",
    "n_count = len(testUserEvents)\n",
    "\n",
    "print(\"number of events count: %d\" % n_count)\n",
    "print(testUserEvents[1])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 题目是要两个合并在一起，然后统一做聚类（使用set速度会快很多）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of events count: 13418\n"
     ]
    }
   ],
   "source": [
    "thisTrain = set()\n",
    "for filename in [\"train.csv\",\"test.csv\"]:\n",
    "    f = open(filename,'rb')\n",
    "    f.readline().decode().strip().split(\",\")\n",
    "    \n",
    "    for line in f:\n",
    "        cols = line.decode().strip().split(\",\")\n",
    "        thisTrain.add(cols[1])  #user - event\n",
    "    f.close()\n",
    "    \n",
    "n_count = len(thisTrain)\n",
    "\n",
    "print(\"number of events count: %d\" % n_count)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "读取events的csv，并筛选数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 分开的做法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics\n",
    "\n",
    "from sklearn.decomposition import PCA\n",
    "\n",
    "import matplotlib.pyplot as plt\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#eventTrain = pd.read_csv(\"events.csv\")\n",
    "#if '684921758' in uniqueEvents:\n",
    "#    print('in')\n",
    "ef = open(\"events.csv\",'rb')\n",
    "out = open('train_events.csv','w')\n",
    "title = ef.readline().decode().strip()\n",
    "out.write(title)\n",
    "out.write(\",interested\")\n",
    "out.write('\\n')\n",
    "for line in ef:\n",
    "    row = line.decode().strip()\n",
    "    cols = row.split(\",\")\n",
    "    key = cols[0]\n",
    "    if key in trainUserEvents:\n",
    "        out.write(row)\n",
    "        out.write(\",\")\n",
    "        out.write(trainUserY[trainUserEvents.index(key)])\n",
    "        out.write('\\n')\n",
    "ef.close()\n",
    "out.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef = open(\"events.csv\",'rb')\n",
    "out = open('test_events.csv','w')\n",
    "title = ef.readline().decode().strip()\n",
    "out.write(title)\n",
    "out.write('\\n')\n",
    "for line in ef:\n",
    "    row = line.decode().strip()\n",
    "    cols = row.split(\",\")\n",
    "    key = cols[0]\n",
    "    if key in testUserEvents:\n",
    "        out.write(row)\n",
    "        out.write('\\n')\n",
    "ef.close()\n",
    "out.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 题目要求训练数据与测试数据一起做聚类"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "ef = open(\"events.csv\",'rb')\n",
    "out = open('this_events.csv','w')\n",
    "title = ef.readline().decode().strip()\n",
    "out.write(title)\n",
    "out.write('\\n')\n",
    "for line in ef:\n",
    "    row = line.decode().strip()\n",
    "    cols = row.split(\",\")\n",
    "    key = cols[0]\n",
    "    if key in thisTrain:\n",
    "        out.write(row)\n",
    "        out.write('\\n')\n",
    "ef.close()\n",
    "out.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "eventTrain = pd.read_csv(\"this_events.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>start_time</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>2012-11-03T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>2012-11-05T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>2012-10-30T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>2012-09-27T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 110 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id                start_time city state  zip country  \\\n",
       "0   684921758  3647864012  2012-10-31T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "1   244999119  3476440521  2012-11-03T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "2  3928440935   517514445  2012-11-05T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "3  2582345152   781585781  2012-10-30T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "4  1051165850  1016098580  2012-09-27T00:00:00.001Z  NaN   NaN  NaN     NaN   \n",
       "\n",
       "   lat  lng  c_1   ...     c_92  c_93  c_94  c_95  c_96  c_97  c_98  c_99  \\\n",
       "0  NaN  NaN    2   ...        0     1     0     0     0     0     0     0   \n",
       "1  NaN  NaN    2   ...        0     0     0     0     0     0     0     0   \n",
       "2  NaN  NaN    0   ...        0     0     0     0     0     0     0     0   \n",
       "3  NaN  NaN    1   ...        0     0     0     0     0     0     0     0   \n",
       "4  NaN  NaN    1   ...        0     0     0     0     0     0     0     0   \n",
       "\n",
       "   c_100  c_other  \n",
       "0      0        9  \n",
       "1      0        7  \n",
       "2      0       12  \n",
       "3      0        8  \n",
       "4      0        9  \n",
       "\n",
       "[5 rows x 110 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eventTrain.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 13418 entries, 0 to 13417\n",
      "Columns: 110 entries, event_id to c_other\n",
      "dtypes: float64(2), int64(103), object(5)\n",
      "memory usage: 11.3+ MB\n"
     ]
    }
   ],
   "source": [
    "eventTrain.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "#y_train = eventTrain[\"interested\"]\n",
    "#X_train = eventTrain.drop([\"event_id\",\"user_id\",\"start_time\",\"city\",\"state\",\"zip\",\"country\",\"lat\",\"lng\",\"interested\"],axis=1)\n",
    "X_train = eventTrain.drop([\"event_id\",\"user_id\",\"start_time\",\"city\",\"state\",\"zip\",\"country\",\"lat\",\"lng\"],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 13418 entries, 0 to 13417\n",
      "Columns: 101 entries, c_1 to c_other\n",
      "dtypes: int64(101)\n",
      "memory usage: 10.3 MB\n"
     ]
    }
   ],
   "source": [
    "X_train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 一个参数点（聚类数据为K）的模型，在校验集上评价聚类算法性能\n",
    "import time\n",
    "def K_cluster_analysis(K, X_train):\n",
    "    start = time.time()\n",
    "    \n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(X_train)\n",
    "\n",
    "    CH_score = metrics.silhouette_score(X_train,mb_kmeans.predict(X_train))\n",
    "\n",
    "    \n",
    "    end = time.time()\n",
    "    print(\"CH_score: {}, time elaps:{}\".format(CH_score, int(end-start)))\n",
    "    \n",
    "    return CH_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 10\n",
      "CH_score: 0.3941314935352942, time elaps:6\n",
      "K-means begin with clusters: 20\n",
      "CH_score: 0.28718918437041385, time elaps:3\n",
      "K-means begin with clusters: 30\n",
      "CH_score: 0.1659202373017241, time elaps:3\n",
      "K-means begin with clusters: 40\n",
      "CH_score: 0.2019485914742695, time elaps:3\n",
      "K-means begin with clusters: 50\n",
      "CH_score: 0.16350471829564436, time elaps:3\n",
      "K-means begin with clusters: 60\n",
      "CH_score: 0.08807584738423874, time elaps:3\n",
      "K-means begin with clusters: 70\n",
      "CH_score: 0.11080205668835072, time elaps:3\n",
      "K-means begin with clusters: 80\n",
      "CH_score: 0.094853768526612, time elaps:3\n",
      "K-means begin with clusters: 90\n",
      "CH_score: 0.09683988505113453, time elaps:3\n",
      "K-means begin with clusters: 100\n",
      "CH_score: 0.08458363096621882, time elaps:3\n"
     ]
    }
   ],
   "source": [
    "Ks = [10, 20, 30,40,50,60,70,80,90,100]\n",
    "CH_scores = []\n",
    "for K in Ks:\n",
    "    ch = K_cluster_analysis(K, X_train)\n",
    "    CH_scores.append(ch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x7f15edda0748>]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAIABJREFUeJzt3XmYFNXZ/vHvA8gmxiCQF9kcUDQiIGBLFNwXghu4EANuxCUEXxCIZsEIYnAJKiqioCDiQhQUoxE1ymvcl6gMArIIYREFXBiEqJF14Pn9cXp+NOPA9Mz0dPV035/r6mumq6q7n2mau06fOnXK3B0REckN1aIuQERE0kehLyKSQxT6IiI5RKEvIpJDFPoiIjlEoS8ikkMU+iIiOUShLyKSQxT6IiI5pEbUBRTXsGFDz8vLi7oMEZEqZfbs2evcvVFp22Vc6Ofl5ZGfnx91GSIiVYqZfZrMdureERHJIUmFvpl1N7MlZrbMzIbuYbvzzMzNLJaw7Nr445aY2c9TUbSIiJRPqd07ZlYdGAecCqwGZpnZDHdfVGy7fYDBwPsJy9oAvYHDgCbAP83sYHffnro/QUREkpVMS78zsMzdV7j7VmAa0LOE7W4EbgU2JyzrCUxz9y3u/gmwLP58IiISgWRCvymwKuH+6viy/8/MOgHN3f2Fsj42/vh+ZpZvZvkFBQVJFS4iImVX4QO5ZlYNuBO4przP4e4T3T3m7rFGjUodcSQiIuWUzJDNNUDzhPvN4suK7AO0BV43M4DGwAwz65HEY0VEJI2SaenPAlqbWUszq0k4MDujaKW7f+PuDd09z93zgPeAHu6eH9+ut5nVMrOWQGvgg5T/FcA338Dw4bBkSWU8u4hIdii1pe/uhWY2EJgJVAcmu/tCMxsJ5Lv7jD08dqGZPQksAgqBAZU1cmfzZrjjDvj0U3j00cp4BRGRqs8y7cLosVjMy3tG7jXXwJgxsHgxtG6d4sJERDKYmc1291hp22XVGbm//z3UrAm33BJ1JSIimSmrQr9xY+jfH6ZMgeXLo65GRCTzZFXoQ2jt16ih1r6ISEmyLvSbNIF+/cLB3E8+iboaEZHMknWhD/DHP0K1avCXv0RdiYhIZsnK0G/aFH79a3jooTCEU0REgqwMfYChQ9XaFxEpLmtDv1kzuOwymDwZPvss6mpERDJD1oY+wLXXhp+jRkVbh4hIpsjq0G/RAi69FB58EFavjroaEZHoZXXoQ2jt79gBt94adSUiItHL+tDPy4O+feGBB+Dzz6OuRkQkWlkf+gB/+hMUFqq1LyKSE6HfqhVccglMnAhffBF1NSIi0cmJ0Ae47jrYtg1uvz3qSkREopMzoX/ggXDhhXD//fDVV1FXIyISjZwJfYBhw2DLFrX2RSR35VTot24NF1wA48fD2rVRVyMikn45FfoQWvtF19MVEck1ORf6hxwCvXvDuHGwbl3U1YiIpFfOhT7A8OGwcaNa+yKSe3Iy9A89FM4/H+69F77+OupqRETSJydDH0Jr//vv4a67oq5ERCR9kgp9M+tuZkvMbJmZDS1hfX8zm29mc83sbTNrE1+eZ2ab4svnmtn9qf4Dyuuww6BXLxg7Ftavj7oaEZH0KDX0zaw6MA44DWgD9CkK9QSPu3s7d+8A3AbcmbBuubt3iN/6p6rwVBg+HL77DsaMiboSEZH0SKal3xlY5u4r3H0rMA3ombiBu3+bcHdvwFNXYuVp1w7OPRfuvhs2bIi6GhGRypdM6DcFViXcXx1ftgszG2Bmywkt/UEJq1qa2Rwze8PMji3pBcysn5nlm1l+QUFBGcqvuOuvh2+/DcEvIpLtUnYg193HufuBwB+BYfHFXwAt3L0jcDXwuJn9qITHTnT3mLvHGjVqlKqSknL44XD22aGL55tv0vrSIiJpl0zorwGaJ9xvFl+2O9OAswHcfYu7fx3/fTawHDi4fKVWnuHDQ+CPHRt1JSIilSuZ0J8FtDazlmZWE+gNzEjcwMxaJ9w9A1gaX94ofiAYM2sFtAZWpKLwVOrUCc46Kwzf/Pbb0rcXEamqSg19dy8EBgIzgY+BJ919oZmNNLMe8c0GmtlCM5tL6MbpG19+HPBRfPlTQH93z8gBkiNGhIO599wTdSUiIpXH3DNroE0sFvP8/PxIXvvMM+Ff/4KVK2GffSIpQUSkXMxstrvHStsuZ8/ILcn114cTtcaNi7oSEZHKodBP0LkzdO8Oo0fDf/8bdTUiIqmn0C9mxIgwCdv48VFXIiKSegr9Yo46Crp1C63977+PuhoRkdRS6JdgxAgoKAgXURcRySYK/RJ06QInnwy33RYutiIiki0U+rsxYkS4ePqECVFXIiKSOgr93Tj2WDjxxNDa37Qp6mpERFJDob8HI0bAl1/CAw9EXYmISGoo9Pfg+OPhuOPg1lth8+aoqxERqTiFfilGjIDPP4dJk6KuRESk4hT6pTjxRDjmGBg1CrZsiboaEZGKUeiXwiy09tesgcmTo65GRKRiFPpJOPlkOPpo+Mtf1NoXkapNoZ+Eotb+qlXw8MNRVyMiUn4K/SR16wY/+xnccgts3Rp1NSIi5aPQT1JRa/+zz+CRR6KuRkSkfBT6ZdC9Oxx5ZGjtb9sWdTUiImWn0C8Ds3B1rZUrYcqUqKsRESk7hX4ZnXEGHHEE3HyzWvsiUvUo9MuoqLW/YgU89ljU1YiIlI1CvxzOOgs6dAit/cLCqKsREUmeQr8cilr7y5bB1KlRVyMikrykQt/MupvZEjNbZmZDS1jf38zmm9lcM3vbzNokrLs2/rglZvbzVBYfpZ49oX17uOkm2L496mpERJJTauibWXVgHHAa0AbokxjqcY+7ezt37wDcBtwZf2wboDdwGNAdGB9/viqvWrXQ2v/3v2HatKirERFJTjIt/c7AMndf4e5bgWlAz8QN3P3bhLt7Ax7/vScwzd23uPsnwLL482WFc86Btm3hxhvV2heRqiGZ0G8KrEq4vzq+bBdmNsDMlhNa+oPK+Nh+ZpZvZvkFBQXJ1h65atVg+HBYsgSmT4+6GhGR0qXsQK67j3P3A4E/AsPK+NiJ7h5z91ijRo1SVVJa9OoFbdqE1v6OHVFXIyKyZ8mE/hqgecL9ZvFluzMNOLucj61yilr7ixbBU09FXY2IyJ4lE/qzgNZm1tLMahIOzM5I3MDMWifcPQNYGv99BtDbzGqZWUugNfBBxcvOLL/4Bfz0pzBypFr7IpLZSg19dy8EBgIzgY+BJ919oZmNNLMe8c0GmtlCM5sLXA30jT92IfAksAh4CRjg7ll3yLN6dRg2DBYuhGeeiboaEZHdM3cvfas0isVinp+fH3UZZbZ9e+jbr10b5swJ3T4iIuliZrPdPVbadoqmFClq7X/0ETz7bNTViIiUTKGfQn36wEEHhb79DPsCJSICKPRTqkaN0NqfOxdefDHqakREfkihn2J9+kCTJjBmTNSViIj8kEI/xWrWhAED4OWXw2geEZFMotCvBP36hVE8Y8dGXYmIyK4U+pWgYUO46CJ49FH4+uuoqxER2UmhX0kGD4bNm2HixKgrERHZSaFfSdq2hVNOgXvv1QXURSRzKPQr0ZAh8PnnmohNRDKHQr8SnXYatG4Nd98ddSUiIoFCvxJVqwaDBsH778N770VdjYiIQr/S/epXsO++OllLRDKDQr+S1asHV1wR+vVXrSp9exGRyqTQT4OBA8MEbOPHR12JiOQ6hX4a5OXB2WfDhAmwcWPU1YhILlPop8mQIbBhA0yZEnUlIpLLFPppcswx0KlTOKCr6+iKSFQU+mliFlr7ixeHGThFRKKg0E+j88+Hxo11spaIREehn0a1asGVV4arai1eHHU1IpKLFPpp1r9/uNCK5toXkSgo9NPsJz+BCy+ERx6B9eujrkZEck1SoW9m3c1siZktM7OhJay/2swWmdlHZvaKmR2QsG67mc2N32aksviqavDgMF5/0qSoKxGRXFNq6JtZdWAccBrQBuhjZm2KbTYHiLl7e+Ap4LaEdZvcvUP81iNFdVdphx8OJ54Y5tovLIy6GhHJJcm09DsDy9x9hbtvBaYBPRM3cPfX3L3oXNP3gGapLTP7DB4c5uJ55pmoKxGRXJJM6DcFEqcKWx1ftjuXAy8m3K9tZvlm9p6ZnV3SA8ysX3yb/IKCgiRKqvrOPBNatdLsmyKSXik9kGtmFwEx4PaExQe4ewy4ABhjZgcWf5y7T3T3mLvHGjVqlMqSMlb16mGu/XffhVmzoq5GRHJFMqG/BmiecL9ZfNkuzOwU4Dqgh7tvKVru7mviP1cArwMdK1BvVrn0UthnH52sJSLpk0zozwJam1lLM6sJ9AZ2GYVjZh2BCYTAX5uwvL6Z1Yr/3hDoCixKVfFV3Y9+BJdfDk88Ea6lKyJS2UoNfXcvBAYCM4GPgSfdfaGZjTSzotE4twP1gOnFhmYeCuSb2TzgNWCUuyv0E1x1FWzfrrn2RSQ9zN2jrmEXsVjM8/Pzoy4jrc4+G95+O4zmqVMn6mpEpCoys9nx46d7pDNyM8CQIfD11/DYY1FXIiLZTqGfAY4/Ppywdffd4bKKIiKVRaGfAYrm2l+wAF59NepqRCSbKfQzRO/e0KiRTtYSkcql0M8QtWuHufaffx6WLo26GhHJVgr9DHLllbDXXpprX0Qqj0I/gzRuDH36wEMPwX/+E3U1IpKNFPoZZvBg+P57mDw56kpEJBsp9DNMp05w7LGhi0dz7YtIqin0M9CQIfDppzBD1xkTkRRT6Gegnj0hL0+zb4pI6in0M1D16mEitjffhA8/jLoaEckmCv0MdfnlUK+eWvsikloK/Qy1777wq1/B1Knw5ZdRVyMi2UKhn8EGDYJt2+C++6KuRESyhUI/g7VuHS6gfv/9sHlz1NWISDZQ6Ge4IUNg7VqYNi3qSkQkGyj0M9xJJ0HbtmH2Tc21LyIVpdDPcGZhaoZ58+CNN6KuRkSqOoV+FXDhhdCggebaF5GKU+hXAXXqQP/+YVqGFSuirkZEqjKFfhXxv/8bztS9556oKxGRqkyhX0U0aQK//CU8+CB8+23U1YhIVZVU6JtZdzNbYmbLzGxoCeuvNrNFZvaRmb1iZgckrOtrZkvjt76pLD7XDB4M330XLrIiIlIepYa+mVUHxgGnAW2APmbWpthmc4CYu7cHngJuiz92P2AE8DOgMzDCzOqnrvzccuSR0KVL6OLZvj3qakq3ZQuMGAGXXQbr10ddjYhAci39zsAyd1/h7luBaUDPxA3c/TV33xi/+x7QLP77z4GX3X29u28AXga6p6b03DRkCCxfDi+8EHUlezZvHnTuDCNHwqOPwuGHa8ipSCZIJvSbAqsS7q+OL9udy4EXy/JYM+tnZvlmll9QUJBESbnrnHOgefPMHb5ZWAg33xy+lXz1FTz7LLz/fhiBdNJJcP31uiKYSJRSeiDXzC4CYsDtZXmcu09095i7xxo1apTKkrJOjRphrv3XXgut6UyyeDF07QrDhoWd04IF0KMHHHFEuC7AJZfAjTfC8ceHK4OJSPolE/prgOYJ95vFl+3CzE4BrgN6uPuWsjxWyuaKK6Bu3cyZa3/HjvDNo2NHWLYszBP0xBPQsOHOberVCwegH3sM5s8P3T3Tp0dXs0iuSib0ZwGtzaylmdUEegO7XL3VzDoCEwiBvzZh1Uygm5nVjx/A7RZfJhVQvz707QuPPx4mY4vSJ5+Ebpvf/hZOPjm07n/5y91vf8EFMHcuHHIInH8+9OsH33+fvnpFcl2poe/uhcBAQlh/DDzp7gvNbKSZ9YhvdjtQD5huZnPNbEb8seuBGwk7jlnAyPgyqaBBg8LomAkTonl9d5g4Edq3D103kyfDc8/B/vuX/thWreDtt2HoUJg0CWKxzOuqEslW5hk2dWMsFvP8/Pyoy6gSTj8d5syBlSuhVq30ve6aNaGL6aWXQit/8mQ44IDSH1eSV16Biy8OQzpvvx0GDgyTzIlI2ZjZbHePlbadzsitwoYMCZdSfPLJ9Lyee+iTb9s2DL+85x54+eXyBz6ELqF58+CUU8K3l549Yd261NUsIrtS6Fdhp54Khx6anrn2CwqgVy+46KLwmvPmhVZ5tRR8gho1Cl1Dd98NM2eGLqNXX63484rIDyn0q7CiufY//BDeeafyXueZZ+Cww+D55+HWW+Gtt8KlHFPJLLT0338/XBT+lFPguuvCNYJFJHUU+lXcxReH0TyVcbLWhg3h+c89F5o1g9mz4Q9/CLN9VpYOHSA/Hy6/HG65BY49NowQEpHUUOhXcXXrwm9+E1rjK1em7nlnzoR27WDq1HAW7Xvvhb78dNh7b3jggTDWf/HisCOYOjU9ry2S7RT6WWDAgNA9cu+9FX+u//43XLCle3f40Y9C2P/5z1CzZsWfu6zOPz+M6T/ssDC+/7LLQn0iUn4K/SzQrFk4yDppUsVC8c03w0HUiRPhd78LxwpipQ4Aq1x5eaGuYcPg4Yd3TukgIuWj0M8SQ4bAN9/AI4+U/bGbNsHVV8MJJ4RvDG+8EcbM166d8jLLpUaNMGfPq6+Gs3ePOgruuitM/yAiZaPQzxJHHQU/+1kY9liWMJw1Czp1CiHav38YinnssZVXZ0WccEKo7/TTw07qzDOjn4ZCpKpR6GeRIUNg6VJ48cXSt926FYYPh6OPDl1CM2fC+PFhYrRM1qBBOGg9blxo+bdvH04QE5HkKPSzyHnnQdOmpQ/fnD8/fCu46aZwstX8+dCtW3pqTAWzcKH4WbPCTqBbtzCUdOvWqCsTyXwK/Syy115hJM8//wkLF/5wfWEhjBoVDoZ+/nloMT/8MPz4x2kvNSXatQvB379/OAbRtWuY2llEdk+hn2X69QsHYIvPtf/vf4e++muvDRc2WbAAzj47mhpTqW5duO8++NvfQuB37Ah//WvUVYlkLoV+lmnQIFyhasqUMHHZjh0wdmw4wWnJkjAH//TpYb6bbHLuueEgb8eO4SziSy6B776LuiqRzKPQz0KDB8PmzXDDDWEOm8GDw8iXBQugT5/snbq4RYtwcPeGG8JsoB07hu4fEdlJoZ+F2rQJM3COGxdCb9IkeOEFaNIk6soqX40aMGJEONdg61bo0iX092tMv0ig0M9St90WJi2bPz/8zNbW/e4cc0zo7unZM4zs6d49XHtAJNcp9LNUhw6hhZ+XF3Ul0alfPxy/mDAhXJ6xW7fQ7SWSyxT6ktXMwoim6dPDt55hw6KuSCRaCn3JCWecAVdeCXfcoatySW5T6EvOGD0aDjkE+vYNF4gRyUUKfckZdeuGE7e+/DK0+iv7usIimSip0Dez7ma2xMyWmdnQEtYfZ2YfmlmhmfUqtm67mc2N32akqnCR8ojFwjj+J54IJ6qJ5JpSQ9/MqgPjgNOANkAfM2tTbLPPgF8BJf032uTuHeK3HhWsV6TChg4N8/QMGACffhp1NSLplUxLvzOwzN1XuPtWYBrQM3EDd1/p7h8BOgVGMl716mGaiu3bQ//+9u1RVySSPsmEflNgVcL91fFlyaptZvlm9p6ZZcEUX5INWraEe+4JZ+7ecUfU1YikTzoO5B7g7jHgAmCMmR1YfAMz6xffMeQXFBSkoSSR0Mo/77wwdn/OnKirEUmPZEJ/DdA84X6z+LKkuPua+M8VwOtAxxK2mejuMXePNcq26R8lY5mFs3UbNgwXk9m0KeqKRCpfMqE/C2htZi3NrCbQG0hqFI6Z1TezWvHfGwJdgUXlLVYk1Ro0CBeSWbQoHOAVyXalhr67FwIDgZnAx8CT7r7QzEaaWQ8AMzvSzFYDvwAmmFnRdZsOBfLNbB7wGjDK3RX6klG6dYNBg8J1B2bOjLoakcplnmFnqMRiMc/Pz4+6DMkxmzaFMfwbNoQ5eho0iLoikbIxs9nx46d7pDNyRYA6dcKFV9atCxO0ZVhbSCRlFPoicR06wE03wdNPwyOPRF2NSOVQ6IskuOYaOP54uOoqWLEi6mpEUk+hL5KgenV49NHw85JLoLAw6opEUkuhL1JMixYwfjy88w7cemvU1YiklkJfpAQXXAC9e4cZOWfNiroakdRR6Ivsxvjx0LhxOFv3+++jrkYkNRT6IrtRv37o31+6FH7/+6irEUkNhb7IHpx4Ilx9Ndx3H7zwQtTViFScQl+kFDffDO3awWWXwdq1UVcjUjEKfZFS1KoVztb95hu44gqdrStVm0JfJAnt2sGoUfDcczBpUtTViJSfQl8kSYMGwcknw5Ah4eCuSFWk0BdJUrVqYe79WrXCMM5t26KuSKTsFPoiZdCsWbja1gcfhAO8IlWNQl+kjH7xizAvz003wb/+FXU1ImWj0Bcph7FjQ6v/4ovhu++irkYkeQp9kXLYd1+YMiVMv/zb30ZdjUjyFPoi5XTsseFi6g8+CH//e9TViCRHoS9SATfcAJ06hZO2vvgi6mqSM3cuXH89TJ8O334bdTWSbjWiLkCkKqtZE/761xD8l10G//gHmEVd1Q8VFsKzz4ZjEW++uXP5XnuFK4WddVa4tWwZXY2SHmrpi1TQoYfC6NHw0kthOuZM8vXX4UIwrVpBr16walWotaAA3norHI9YswYGDw7btG0L114L774L27dHXb1UBvMMm0gkFot5fn5+1GWIlIk7nH46vP46fPhh2BFEad48uOeeMGfQ5s3hTOJBg+CMM8KlIItbvjxMMfHcc+GbQGEhNGoUtj/rLOjWDerVS//fIckzs9nuHittu6Ra+mbW3cyWmNkyMxtawvrjzOxDMys0s17F1vU1s6XxW9/k/wSRqsMMJk+GvfcOZ+tu3Zr+GgoL4emn4YQToEMHePxx6NsX5s+Hf/4TevQoOfABDjwwTC/xyivhW8DUqXDqqaFL6LzzoEED6N4dxo2Dzz5L658lqebue7wB1YHlQCugJjAPaFNsmzygPfAo0Cth+X7AivjP+vHf6+/p9Y444ggXqaqeftod3K+9Nn2vuW6d+6hR7i1ahNfOy3MfPdp9/fqKP/e2be6vv+5+zTXuBx8cnh/c27d3v+469/fec9++veKvIxUH5Hspee7uSbX0OwPL3H2Fu28FpgE9i+04Vrr7R8COYo/9OfCyu6939w3Ay0D3su2WRKqOc86Byy8PM3K+9VblvtZHH8Gvfx1OEhs6FA46KAwdXbYMrrkmXPmromrUCAd6R4+GJUvCbfTo8NyjRsFRR0GTJuFv/vvfdVnJqiCZ0G8KrEq4vzq+LBkVeaxIlTRmTDgoevHFYQ7+VCoshGeeCVf0Ovzw0Gd/ySWhC+eVV6Bnz9134aTCwQeHHcrrr4cLyjz2WKjlb38LO7wGDcJxgPvvh9WrK68OKb+MGL1jZv3MLN/M8gsKCqIuR6RC6tULZ+uuWhUOnqbC+vVw++2hNX/uueFM4NtuC8E6YUIYdZNu++0HF1wQ+v8LCsJO58orw7eBK6+E5s3DUNYRIyA/H3YU7weQSCQzTn8N0DzhfrP4smSsAU4o9tjXi2/k7hOBiRBG7yT53CIZ6+ijYdgwGDkSzjwzTNJWHgsWhFE4U6bApk2hVX3XXWFETY0MOstmr73gpJPC7c47YfHinaOBbropvA/77x/ei7POCqOJ6tat+Ou6h9FJGzeG92fjxt3/vqf1tWuHrqpjjgnfZjLxXItUKXXIppnVAP4NnEwI8VnABe6+sIRtHwaed/en4vf3A2YDneKbfAgc4e7rd/d6GrIp2WLbthAiS5eG7pemSXZsbt8ewnLsWHjttRBIF18MV10VruBV1axbBy++GP6ml14KE9TVqQOnnBJ2EmZlC+jiP8ujRo2w06lbN9Tyn//Ahg1hXcOG0KULdO0a/v2OOCJcQyHTJTtkM6lx+mZ2OjCGMJJnsrvfbGYjCUeLZ5jZkcAzhBE6m4Ev3f2w+GMvA/4Uf6qb3f2hPb2WQl+yydKlYfhkly4wc2a4EMvubNgQ5vEZNw5WroQWLWDAgHCQtEGDtJVcqbZuhTfe2PktYOXKnevMdoZw4s9ULSv6vU6d8M0k0Y4doVvqnXd23oqujlarFsRiO3cCXbpk5r9HSkM/nRT6km0mToTf/CZ0ywwZ8sP1Cxfu7MLZuDGMlhk0KIyrz6QunFRzDweDa9YMQVyrVmZ1q3z1VTgzuWgnMHv2zqul/fSnYQfQtWu4HXRQ9LUr9EUyhHsYVfN//xcOaLZtG7pwnn8+dOG8+mrowrnootCF07591BVLSTZtglmzdu4E3n13Z5fQT36ycwfQtWs4gF2zZnrrU+iLZJC1a0N/fOPGYYjlvfeGro3mzUMXzhVXZGaXgezejh3w8ce7dgktXx7W1a4NnTvv3Al06ZKa8yb2RKEvkmFeeCGMXgE47rjQhdOzZ3Z34eSaL7/cdSfw4Yfh3AqAww7b9dtAq1ap7RJS6ItkoH/8I5zB2qFD1JVIOmzcCB98sGuXUNEJe40b77oT6NjxhweYy0KhLyKSYXbsCAfui3YCb7+9cwRTnTrhm9/UqeV77mRDX18sRUTSpFq1cGynXTvo3z8s+/zznTuBvfeu/BoU+iIiEWrSJJyxXd6ztssqI+beERGR9FDoi4jkEIW+iEgOUeiLiOQQhb6ISA5R6IuI5BCFvohIDlHoi4jkkIybhsHMCoBPo66jghoC66IuIoPo/diV3o+d9F7sqiLvxwHu3qi0jTIu9LOBmeUnMwdGrtD7sSu9HzvpvdhVOt4Pde+IiOQQhb6ISA5R6FeOiVEXkGH0fuxK78dOei92Venvh/r0RURyiFr6IiI5RKFfQWbW3MxeM7NFZrbQzAbHl+9nZi+b2dL4z0q+LHLmMLPqZjbHzJ6P329pZu+b2TIze8LMakZdY7qY2Y/N7CkzW2xmH5vZ0Tn+2fht/P/JAjObama1c+nzYWaTzWytmS1IWFbi58GCsfH35SMz65SKGhT6FVcIXOPubYCjgAFm1gYYCrzi7q2BV+L3c8Vg4OOE+7cCd7n7QcAG4PJIqorG3cBL7v5T4HDC+5KTnw0zawoMAmLu3haoDvQmtz4fDwPdiy3b3efhNKB1/NYPuC8lFbi7bim8Ac8CpwJLgP3jy/YHlkRdW5r+/mbxD+5JwPOAEU42qRFffzQwM+o60/Re7At8QvzYWcLyXP28HPzdAAACNklEQVRsNAVWAfsRrtr3PPDzXPt8AHnAgtI+D8AEoE9J21XkppZ+CplZHtAReB/4H3f/Ir7qS+B/Iior3cYAfwB2xO83AP7j7oXx+6sJ//lzQUugAHgo3t01ycz2Jkc/G+6+BhgNfAZ8AXwDzCZ3Px9Fdvd5KNpJFknJe6PQTxEzqwf8DRji7t8mrvOwm876YVJmdiaw1t1nR11LhqgBdALuc/eOwPcU68rJlc8GQLyvuidhZ9gE2JsfdnXktHR8HhT6KWBmexEC/zF3fzq++Csz2z++fn9gbVT1pVFXoIeZrQSmEbp47gZ+bGY14ts0A9ZEU17arQZWu/v78ftPEXYCufjZADgF+MTdC9x9G/A04TOTq5+PIrv7PKwBmidsl5L3RqFfQWZmwIPAx+5+Z8KqGUDf+O99CX39Wc3dr3X3Zu6eRzhA96q7Xwi8BvSKb5YT7wWAu38JrDKzQ+KLTgYWkYOfjbjPgKPMrG78/03R+5GTn48Eu/s8zAAuiY/iOQr4JqEbqNx0clYFmdkxwFvAfHb2Y/+J0K//JNCCMGvo+e6+PpIiI2BmJwC/c/czzawVoeW/HzAHuMjdt0RZX7qYWQdgElATWAFcSmhs5eRnw8z+DPySMOptDnAFoZ86Jz4fZjYVOIEwm+ZXwAjg75TweYjvGO8ldIFtBC519/wK16DQFxHJHereERHJIQp9EZEcotAXEckhCn0RkRyi0BcRySEKfRGRHKLQFxHJIQp9EZEc8v8A6sgU+nX73GIAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.plot(Ks, np.array(CH_scores), 'b-')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "这个结果竟然是10的时候最好，不知道是否正确"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7rc1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
